#############################################3 virulence test on walnut rootstock vlach; the experiment was repeated once, each time 3 plants was inoculated for each strain.
walnut_tumor <- read.table("walnut_tumor.csv", header = TRUE, sep = ",")
str(walnut_tumor)
## 'data.frame': 186 obs. of 2 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ weight : num 0.677 0.995 0.495 0 0 ...
ggplot(walnut_tumor, aes(x=strains, y=weight, color=strains)) +
geom_boxplot() +
geom_jitter() +
theme(axis.text.x=element_text(hjust=0.5,vjust=0.5,angle=90))
#ylim(0,20)
ggsave("/Users/dklabuser/limin/Dissertation_papers/dissertation/chapter3_phenoTraits/tumor_walnut.png", width = 20, height = 20, units = "in")
statistic analysis of tumor weight from walnut
walnut <- read.table("walnut_tumor.csv", header = TRUE, sep = ",")
# remove 3 non-virulent strains,and mock
walnut_cleand <- walnut %>% filter(!(strains %in% c("But002","SJ003","Tul002","Mock")))
# model for final data analysis
m2 <- lm(weight ~ strains, walnut_cleand)
qqp(residuals(m2))
## [1] 118 136
plotNormalHistogram(residuals(m2))
# log transformation to m2; 普通/一般线性模型
m2_trs <- lm(log(weight+0.5) ~ strains, walnut_cleand)
# or with following codes to check residuals
qqp(residuals(m2_trs))
## [1] 118 51
plotNormalHistogram(residuals(m2_trs))
anova(m2_trs)
## Analysis of Variance Table
##
## Response: log(weight + 0.5)
## Df Sum Sq Mean Sq F value Pr(>F)
## strains 26 46.615 1.79289 2.0666 0.004045 **
## Residuals 135 117.119 0.86755
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# categorical data use this method.
means <- emmeans(m2_trs, specs = "strains")
cld(means, alpha = 0.05, Letters = "abcdefghijklmn") # multiple comparison, tuky-test
## strains emmean SE df lower.CL upper.CL .group
## Kin002 -0.2176 0.38 135 -0.9696 0.534 a
## CL001 0.0119 0.38 135 -0.7401 0.764 ab
## C58 0.4655 0.38 135 -0.2865 1.217 ab
## Kin001 0.5280 0.38 135 -0.2240 1.280 ab
## Tul001 0.6752 0.38 135 -0.0768 1.427 ab
## Yol002 0.7271 0.38 135 -0.0249 1.479 ab
## CC001 0.7659 0.38 135 0.0138 1.518 ab
## Kin003 0.8492 0.38 135 0.0971 1.601 ab
## Gle001 0.8511 0.38 135 0.0990 1.603 ab
## But001 1.0391 0.38 135 0.2871 1.791 ab
## Sta004 1.0671 0.38 135 0.3151 1.819 ab
## Yub001 1.0740 0.38 135 0.3220 1.826 ab
## Gle002 1.0822 0.38 135 0.3302 1.834 ab
## Sta001 1.1637 0.38 135 0.4117 1.916 ab
## Yub002 1.3169 0.38 135 0.5649 2.069 ab
## Sut002 1.4098 0.38 135 0.6578 2.162 ab
## Tul003 1.4188 0.38 135 0.6668 2.171 ab
## Sta003 1.4622 0.38 135 0.7102 2.214 ab
## Tul004 1.4840 0.38 135 0.7319 2.236 ab
## Sta005 1.4941 0.38 135 0.7421 2.246 ab
## SJ002 1.5537 0.38 135 0.8017 2.306 ab
## SJ001 1.5670 0.38 135 0.8150 2.319 ab
## Teh002 1.5805 0.38 135 0.8285 2.333 ab
## Yol001 1.6059 0.38 135 0.8539 2.358 ab
## Sta002 1.8615 0.38 135 1.1095 2.614 b
## Sut001 1.9181 0.38 135 1.1661 2.670 b
## Teh001 1.9682 0.38 135 1.2162 2.720 b
##
## Results are given on the log(mu + 0.5) (not the response) scale.
## Confidence level used: 0.95
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 27 estimates
## significance level used: alpha = 0.05
statistic analysis of tumor weight from walnut virulenc test on Datura plants, the experiment was repeated in 3 blocks(3 benches). On each bench there were 5 plants inoculated for each strain. In total, each strain was inoculated to 15 datura plants
datura <- read.table("datura_tumor.csv", header = TRUE, sep = ",")
# remove 3 non-virulent strains,and mock
datura_cleand <- datura %>% filter(!(strains %in% c("But002","SJ003","Tul002","Mock")))
# mixed model for final data analysis
m2 <- lmer(weight ~ strains + block + (1|line), datura_cleand)
qqp(residuals(m2))
## [1] 333 240
plotNormalHistogram(residuals(m2))
# Do log transformation to m2
m2_trs <- lmer(log(weight+0.5) ~ strains + block + (1|line), datura_cleand)
# check residuals
qqp(residuals(m2_trs))
## [1] 240 336
plotNormalHistogram(residuals(m2_trs))
Anova(m2_trs)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: log(weight + 0.5)
## Chisq Df Pr(>Chisq)
## strains 106.2916 26 1.129e-11 ***
## block 0.0252 1 0.8739
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
means <- emmeans(m2_trs, specs = "strains")
cld(means, alpha = 0.05, Letters = "abcdefghijklmn") # multiple comparison
## strains emmean SE df lower.CL upper.CL .group
## Kin002 -0.395 0.205 75.5 -0.8038 0.0146 a
## Teh001 0.272 0.203 91.9 -0.1305 0.6746 ab
## C58 0.426 0.183 159.5 0.0642 0.7873 abc
## Gle001 0.433 0.185 137.2 0.0670 0.7990 abc
## Yub002 0.444 0.187 114.7 0.0735 0.8155 abc
## Teh002 0.488 0.179 212.1 0.1348 0.8415 abc
## Kin001 0.495 0.183 158.5 0.1335 0.8563 abc
## Kin003 0.511 0.183 162.1 0.1495 0.8722 abc
## Sut002 0.542 0.185 130.3 0.1754 0.9088 abc
## Tul004 0.727 0.188 108.6 0.3550 1.0987 bc
## Sut001 0.744 0.183 165.6 0.3832 1.1049 bc
## CC001 0.785 0.185 138.5 0.4189 1.1508 bc
## Sta002 0.823 0.180 200.1 0.4669 1.1786 bc
## SJ001 0.830 0.183 169.9 0.4696 1.1908 bc
## Gle002 0.848 0.183 159.3 0.4863 1.2093 bc
## CL001 0.861 0.203 84.9 0.4570 1.2657 bc
## Tul001 0.898 0.186 116.7 0.5296 1.2671 bc
## But001 0.899 0.183 165.9 0.5378 1.2596 bc
## Sta004 0.927 0.183 164.9 0.5660 1.2877 bc
## SJ002 0.965 0.181 191.8 0.6088 1.3221 bc
## Yol002 0.997 0.183 167.6 0.6360 1.3578 bc
## Sta003 1.029 0.185 142.6 0.6631 1.3941 bc
## Sta001 1.046 0.183 160.7 0.6846 1.4071 bc
## Yub001 1.065 0.183 162.7 0.7038 1.4262 bc
## Sta005 1.245 0.183 169.6 0.8846 1.6058 bc
## Tul003 1.323 0.186 124.1 0.9551 1.6901 c
## Yol001 1.343 0.187 116.7 0.9724 1.7134 c
##
## Degrees-of-freedom method: kenward-roger
## Results are given on the log(mu + 0.5) (not the response) scale.
## Confidence level used: 0.95
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 27 estimates
## significance level used: alpha = 0.05
8 Antibiotic resistance test Two antibiotics S10 and VA30 are very different from others, 29 out of 30 strains are resistant to them, giving 0 diameter of inhibition zone.
cb <- read.csv("carbenicillin.csv", header = TRUE, sep = ",")
cb <- cb %>% filter(strains != "Yub001") # remove data points which are 0.
# create a linear model
cbM <- lm(cb100_D ~ strains, cb)
# do diagnostic of data variance
par(mfrow = c(2,2)); plot(cbM); par(mfrow = c(1,1))
## hat values (leverages) are all = 0.3333333
## and there are no factor predictors; no plot no. 5
anova(cbM)
## Analysis of Variance Table
##
## Response: cb100_D
## Df Sum Sq Mean Sq F value Pr(>F)
## strains 28 11.5095 0.41105 28.997 < 2.2e-16 ***
## Residuals 58 0.8222 0.01418
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cb_mean <- emmeans(cbM, specs = "strains")
cld(cb_mean, alpha = 0.05, Letters = "abcdefghijklmn")
## strains emmean SE df lower.CL upper.CL .group
## Tul002 1.67 0.0687 58 1.53 1.80 a
## Kin002 1.70 0.0687 58 1.56 1.84 ab
## Sta001 1.87 0.0687 58 1.73 2.00 abc
## SJ003 1.88 0.0687 58 1.75 2.02 abc
## SJ002 1.94 0.0687 58 1.80 2.08 abcd
## Kin003 1.97 0.0687 58 1.83 2.10 abcd
## Sut002 1.97 0.0687 58 1.83 2.10 abcd
## Gle001 2.00 0.0687 58 1.86 2.14 abcde
## Tul004 2.00 0.0687 58 1.86 2.14 abcde
## Kin001 2.00 0.0687 58 1.86 2.14 abcde
## SJ001 2.03 0.0687 58 1.89 2.16 abcde
## But001 2.03 0.0687 58 1.90 2.17 abcde
## Sut001 2.03 0.0687 58 1.90 2.17 abcde
## Tul003 2.07 0.0687 58 1.93 2.20 bcde
## CC001 2.10 0.0687 58 1.96 2.24 cde
## Yub002 2.13 0.0687 58 2.00 2.27 cde
## Gle002 2.17 0.0687 58 2.03 2.30 cdef
## CL001 2.17 0.0687 58 2.03 2.30 cdef
## Sta002 2.20 0.0687 58 2.06 2.34 cdef
## Sta004 2.23 0.0687 58 2.10 2.37 cdef
## Yol001 2.23 0.0687 58 2.10 2.37 cdef
## Teh002 2.27 0.0687 58 2.13 2.40 def
## Tul001 2.27 0.0687 58 2.13 2.40 def
## Yol002 2.27 0.0687 58 2.13 2.40 def
## Teh001 2.37 0.0687 58 2.23 2.50 efg
## But002 2.53 0.0687 58 2.40 2.67 fg
## Sta003 2.67 0.0687 58 2.53 2.80 g
## Sta005 3.13 0.0687 58 3.00 3.27 h
## C58 3.40 0.0687 58 3.26 3.54 h
##
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 29 estimates
## significance level used: alpha = 0.05
chl <- read.csv("chloramphenicol.csv", header = TRUE, sep = ",")
chl <- chl %>% filter(strains != "Kin002")
str(chl)
## 'data.frame': 87 obs. of 2 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ c30_D : num 2.4 2.5 2.5 0.8 1 1 1 1 0.9 1.1 ...
# create a linear model
chlM <- lm(c30_D ~ strains, chl)
# do diagnostic of data variance
par(mfrow = c(2,2)); plot(chlM); par(mfrow = c(1,1))
## hat values (leverages) are all = 0.3333333
## and there are no factor predictors; no plot no. 5
anova(chlM)
## Analysis of Variance Table
##
## Response: c30_D
## Df Sum Sq Mean Sq F value Pr(>F)
## strains 28 14.0940 0.50336 62.56 < 2.2e-16 ***
## Residuals 58 0.4667 0.00805
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
chl_mean <- emmeans(chlM, specs = "strains")
cld(chl_mean, alpha = 0.05, Letters = "abcdefghijklmn")
## strains emmean SE df lower.CL upper.CL .group
## Kin003 0.700 0.0518 58 0.596 0.804 a
## Tul003 0.733 0.0518 58 0.630 0.837 ab
## Tul004 0.767 0.0518 58 0.663 0.870 abc
## Teh001 0.800 0.0518 58 0.696 0.904 abcd
## Sta003 0.833 0.0518 58 0.730 0.937 abcde
## SJ003 0.867 0.0518 58 0.763 0.970 abcdef
## But002 0.933 0.0518 58 0.830 1.037 abcdefg
## Tul001 0.933 0.0518 58 0.830 1.037 abcdefg
## Teh002 0.933 0.0518 58 0.830 1.037 abcdefg
## Sut002 0.933 0.0518 58 0.830 1.037 abcdefg
## Sta005 0.967 0.0518 58 0.863 1.070 abcdefg
## CC001 0.967 0.0518 58 0.863 1.070 abcdefg
## Sut001 0.967 0.0518 58 0.863 1.070 abcdefg
## Gle002 1.000 0.0518 58 0.896 1.104 bcdefg
## Sta002 1.033 0.0518 58 0.930 1.137 cdefg
## Yol001 1.033 0.0518 58 0.930 1.137 cdefg
## Yol002 1.033 0.0518 58 0.930 1.137 cdefg
## Gle001 1.033 0.0518 58 0.930 1.137 cdefg
## SJ001 1.067 0.0518 58 0.963 1.170 defg
## Sta001 1.067 0.0518 58 0.963 1.170 defg
## CL001 1.067 0.0518 58 0.963 1.170 defg
## SJ002 1.067 0.0518 58 0.963 1.170 defg
## Yub001 1.100 0.0518 58 0.996 1.204 efg
## Kin001 1.133 0.0518 58 1.030 1.237 fg
## Yub002 1.167 0.0518 58 1.063 1.270 g
## C58 1.500 0.0518 58 1.396 1.604 h
## Tul002 1.900 0.0518 58 1.796 2.004 i
## Sta004 2.200 0.0518 58 2.096 2.304 j
## But001 2.467 0.0518 58 2.363 2.570 j
##
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 29 estimates
## significance level used: alpha = 0.05
cip <- read.csv("ciprofloxacin.csv", header = TRUE, sep = ",")
str(cip)
## 'data.frame': 90 obs. of 2 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ cip_D : num 2.8 3 2.9 3 3.1 3 3 3.1 3.2 3.1 ...
# create a linear model
cipM <- lm(cip_D ~ strains, cip)
# do diagnostic of data variance
par(mfrow = c(2,2)); plot(cipM); par(mfrow = c(1,1))
## hat values (leverages) are all = 0.3333333
## and there are no factor predictors; no plot no. 5
anova(cipM)
## Analysis of Variance Table
##
## Response: cip_D
## Df Sum Sq Mean Sq F value Pr(>F)
## strains 29 14.1891 0.48928 34.739 < 2.2e-16 ***
## Residuals 60 0.8451 0.01408
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cip_mean <- emmeans(cipM, specs = "strains")
cld(cip_mean, alpha = 0.05, Letters = "abcdefghijklmn")
## strains emmean SE df lower.CL upper.CL .group
## Kin002 1.90 0.0685 60 1.76 2.04 a
## Tul002 2.57 0.0685 60 2.43 2.70 b
## Kin003 2.63 0.0685 60 2.50 2.77 bc
## Gle001 2.67 0.0685 60 2.53 2.80 bcd
## SJ003 2.70 0.0685 60 2.57 2.84 bcde
## Sta002 2.83 0.0685 60 2.70 2.97 bcdef
## But001 2.90 0.0685 60 2.76 3.04 bcdefg
## Yub002 3.00 0.0685 60 2.86 3.14 cdefg
## CL001 3.03 0.0685 60 2.90 3.17 defgh
## Sta004 3.03 0.0685 60 2.90 3.17 defgh
## But002 3.03 0.0685 60 2.90 3.17 defgh
## SJ001 3.03 0.0685 60 2.90 3.17 defgh
## Sut002 3.07 0.0685 60 2.93 3.20 efghi
## Teh001 3.10 0.0685 60 2.96 3.24 fghi
## CC001 3.10 0.0685 60 2.96 3.24 fghi
## Sta003 3.10 0.0685 60 2.96 3.24 fghi
## Teh002 3.13 0.0685 60 3.00 3.27 fghij
## Sta001 3.20 0.0685 60 3.06 3.34 fghijk
## Sut001 3.23 0.0685 60 3.10 3.37 ghijkl
## Kin001 3.40 0.0685 60 3.26 3.54 hijklm
## Sta005 3.43 0.0685 60 3.30 3.57 ijklm
## Tul001 3.43 0.0685 60 3.30 3.57 ijklm
## SJ002 3.43 0.0685 60 3.30 3.57 ijklm
## Tul003 3.50 0.0685 60 3.36 3.64 jklmna
## Yol002 3.50 0.0685 60 3.36 3.64 jklmna
## Tul004 3.53 0.0685 60 3.40 3.67 klmna
## Gle002 3.60 0.0685 60 3.46 3.74 lmna
## Yol001 3.63 0.0685 60 3.50 3.77 mna
## Yub001 3.67 0.0685 60 3.53 3.80 mna
## C58 3.83 0.0685 60 3.70 3.97 na
##
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 30 estimates
## significance level used: alpha = 0.05
ery <- read.csv("erythromycin.csv", header = TRUE, sep = ",")
str(ery)
## 'data.frame': 90 obs. of 2 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ e15_D : num 1.1 1 1 0.8 0.7 0.6 1.2 1.2 1.3 1.1 ...
# create a linear model
eryM <- lm(e15_D ~ strains, ery)
# do diagnostic of data variance
par(mfrow = c(2,2)); plot(eryM); par(mfrow = c(1,1))
## hat values (leverages) are all = 0.3333333
## and there are no factor predictors; no plot no. 5
anova(eryM)
## Analysis of Variance Table
##
## Response: e15_D
## Df Sum Sq Mean Sq F value Pr(>F)
## strains 29 4.7779 0.16476 18.306 < 2.2e-16 ***
## Residuals 60 0.5400 0.00900
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ery_mean <- emmeans(eryM, specs = "strains")
cld(ery_mean, alpha = 0.05, Letters = "abcdefghijklmn")
## strains emmean SE df lower.CL upper.CL .group
## But002 0.700 0.0548 60 0.590 0.810 a
## Kin002 0.767 0.0548 60 0.657 0.876 ab
## Sta001 0.867 0.0548 60 0.757 0.976 abc
## Sta005 0.867 0.0548 60 0.757 0.976 abc
## Sut001 0.900 0.0548 60 0.790 1.010 abcd
## Gle001 0.933 0.0548 60 0.824 1.043 abcde
## Kin001 0.933 0.0548 60 0.824 1.043 abcde
## Tul003 0.967 0.0548 60 0.857 1.076 abcde
## SJ002 0.967 0.0548 60 0.857 1.076 abcde
## Gle002 0.967 0.0548 60 0.857 1.076 abcde
## Yol001 1.000 0.0548 60 0.890 1.110 abcde
## Sta003 1.000 0.0548 60 0.890 1.110 abcde
## But001 1.033 0.0548 60 0.924 1.143 bcde
## Teh001 1.067 0.0548 60 0.957 1.176 bcdef
## Tul004 1.067 0.0548 60 0.957 1.176 bcdef
## Yol002 1.067 0.0548 60 0.957 1.176 bcdef
## Yub002 1.067 0.0548 60 0.957 1.176 bcdef
## SJ001 1.067 0.0548 60 0.957 1.176 bcdef
## Tul001 1.100 0.0548 60 0.990 1.210 cdef
## Kin003 1.100 0.0548 60 0.990 1.210 cdef
## Sta002 1.100 0.0548 60 0.990 1.210 cdef
## CL001 1.133 0.0548 60 1.024 1.243 cdefg
## Teh002 1.133 0.0548 60 1.024 1.243 cdefg
## Sta004 1.167 0.0548 60 1.057 1.276 cdefg
## SJ003 1.200 0.0548 60 1.090 1.310 defg
## CC001 1.233 0.0548 60 1.124 1.343 efg
## Sut002 1.233 0.0548 60 1.124 1.343 efg
## Tul002 1.367 0.0548 60 1.257 1.476 fg
## C58 1.433 0.0548 60 1.324 1.543 g
## Yub001 2.000 0.0548 60 1.890 2.110 h
##
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 30 estimates
## significance level used: alpha = 0.05
rif <- read.csv("rifampin.csv", header = TRUE, sep = ",")
rif <- rif %>% filter(strains != "Sta003")
str(rif)
## 'data.frame': 87 obs. of 2 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ rif_D : num 1.3 1.2 1.3 1.2 1.2 1.3 1.4 1.3 1.3 1.2 ...
# create a linear model
rifM <- lm(rif_D ~ strains, rif)
# do diagnostic of data variance
par(mfrow = c(2,2)); plot(rifM); par(mfrow = c(1,1))
## hat values (leverages) are all = 0.3333333
## and there are no factor predictors; no plot no. 5
anova(rifM)
## Analysis of Variance Table
##
## Response: rif_D
## Df Sum Sq Mean Sq F value Pr(>F)
## strains 28 1.22684 0.043816 10.269 8.545e-14 ***
## Residuals 58 0.24747 0.004267
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rif_mean <- emmeans(rifM, specs = "strains")
cld(rif_mean, alpha = 0.05, Letters = "abcdefghi")
## strains emmean SE df lower.CL upper.CL .group
## Sta004 1.13 0.0377 58 1.06 1.21 a
## Sta005 1.17 0.0377 58 1.09 1.24 ab
## Sut001 1.17 0.0377 58 1.09 1.24 ab
## Gle001 1.17 0.0377 58 1.09 1.24 ab
## Sta001 1.20 0.0377 58 1.12 1.28 abc
## But002 1.23 0.0377 58 1.16 1.31 abcd
## Kin002 1.23 0.0377 58 1.16 1.31 abcd
## Teh001 1.23 0.0377 58 1.16 1.31 abcd
## Tul003 1.23 0.0377 58 1.16 1.31 abcd
## Tul004 1.23 0.0377 58 1.16 1.31 abcd
## SJ001 1.25 0.0377 58 1.17 1.33 abcd
## But001 1.27 0.0377 58 1.19 1.34 abcd
## CL001 1.27 0.0377 58 1.19 1.34 abcd
## SJ003 1.27 0.0377 58 1.19 1.34 abcd
## Yol001 1.27 0.0377 58 1.19 1.34 abcd
## Gle002 1.30 0.0377 58 1.22 1.38 abcde
## Tul001 1.30 0.0377 58 1.22 1.38 abcde
## SJ002 1.32 0.0377 58 1.24 1.39 abcde
## CC001 1.33 0.0377 58 1.26 1.41 abcdef
## Teh002 1.33 0.0377 58 1.26 1.41 abcdef
## Yol002 1.33 0.0377 58 1.26 1.41 abcdef
## Sut002 1.37 0.0377 58 1.29 1.44 bcdef
## Yub001 1.40 0.0377 58 1.32 1.48 cdefg
## Sta002 1.43 0.0377 58 1.36 1.51 defg
## Kin001 1.43 0.0377 58 1.36 1.51 defg
## Yub002 1.50 0.0377 58 1.42 1.58 efg
## Tul002 1.53 0.0377 58 1.46 1.61 fg
## Kin003 1.53 0.0377 58 1.46 1.61 fg
## C58 1.60 0.0377 58 1.52 1.68 g
##
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 29 estimates
## significance level used: alpha = 0.05
tec <- read.csv("tetracycline.csv", header = TRUE, sep = ",")
str(tec)
## 'data.frame': 90 obs. of 2 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ te30_D : num 3.4 3.5 3.3 2.3 2.2 2.3 3 2.9 3.2 3.1 ...
# create a linear model
tecM <- lm(te30_D ~ strains, tec)
# do diagnostic of data variance
par(mfrow = c(2,2)); plot(tecM); par(mfrow = c(1,1))
## hat values (leverages) are all = 0.3333333
## and there are no factor predictors; no plot no. 5
anova(tecM)
## Analysis of Variance Table
##
## Response: te30_D
## Df Sum Sq Mean Sq F value Pr(>F)
## strains 29 14.7130 0.50735 39.445 < 2.2e-16 ***
## Residuals 60 0.7717 0.01286
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tec_mean <- emmeans(tecM, specs = "strains")
cld(tec_mean, alpha = 0.05, Letters = "abcdefghiklmnopqrst")
## strains emmean SE df lower.CL upper.CL .group
## Sta001 2.23 0.0655 60 2.10 2.36 a
## But002 2.27 0.0655 60 2.14 2.40 ab
## Sut001 2.27 0.0655 60 2.14 2.40 ab
## Gle001 2.37 0.0655 60 2.24 2.50 abc
## Tul004 2.43 0.0655 60 2.30 2.56 abc
## SJ001 2.45 0.0655 60 2.32 2.58 abc
## Sta002 2.47 0.0655 60 2.34 2.60 abc
## SJ002 2.50 0.0655 60 2.37 2.63 abcd
## Sut002 2.50 0.0655 60 2.37 2.63 abcd
## Tul001 2.50 0.0655 60 2.37 2.63 abcd
## Teh001 2.60 0.0655 60 2.47 2.73 bcde
## Teh002 2.63 0.0655 60 2.50 2.76 cdef
## Yol001 2.70 0.0655 60 2.57 2.83 cdefg
## Gle002 2.70 0.0655 60 2.57 2.83 cdefg
## Sta003 2.83 0.0655 60 2.70 2.96 defgh
## Yol002 2.87 0.0655 60 2.74 3.00 efgh
## SJ003 2.93 0.0655 60 2.80 3.06 efghi
## Kin003 2.97 0.0655 60 2.84 3.10 fghi
## Sta005 3.03 0.0655 60 2.90 3.16 ghik
## CC001 3.03 0.0655 60 2.90 3.16 ghik
## Tul002 3.03 0.0655 60 2.90 3.16 ghik
## Kin001 3.07 0.0655 60 2.94 3.20 hikl
## CL001 3.10 0.0655 60 2.97 3.23 hikl
## Tul003 3.10 0.0655 60 2.97 3.23 hikl
## Yub002 3.27 0.0655 60 3.14 3.40 ikl
## Sta004 3.33 0.0655 60 3.20 3.46 klm
## Kin002 3.37 0.0655 60 3.24 3.50 klmn
## But001 3.40 0.0655 60 3.27 3.53 lmn
## C58 3.63 0.0655 60 3.50 3.76 mn
## Yub001 3.70 0.0655 60 3.57 3.83 n
##
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 30 estimates
## significance level used: alpha = 0.05
antibiotics <- read.csv("antibiotics.csv", header = TRUE, sep = ",")
str(antibiotics)
## 'data.frame': 90 obs. of 9 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ S10 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ RA5 : num 1.3 1.2 1.3 1.2 1.2 1.3 1.4 1.3 1.3 1.2 ...
## $ TE30 : num 3.4 3.5 3.3 2.3 2.2 2.3 3 2.9 3.2 3.1 ...
## $ CB100 : num 2.1 1.9 2.1 2.7 2.5 2.4 2 2.2 2.1 2.1 ...
## $ E15 : num 1.1 1 1 0.8 0.7 0.6 1.2 1.2 1.3 1.1 ...
## $ VA30 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CIP : num 2.8 3 2.9 3 3.1 3 3 3.1 3.2 3.1 ...
## $ C30 : num 2.4 2.5 2.5 0.8 1 1 1 1 0.9 1.1 ...
# save streptomycin to a csv table
s10 <- antibiotics %>% select(1,2)
#write.table(s10, file = "/Users/dklabuser/limin/data_analysis/phenotypic_data/csv/streptomycin.csv", sep = ",", row.names = FALSE)
# save vancomycin to a csv table
va30 <- antibiotics %>% select(1,7)
#write.table(va30, file = "/Users/dklabuser/limin/data_analysis/phenotypic_data/csv/vancomycin.csv", sep = ",", row.names = FALSE)
K84 test
k84 <- read.csv("k84.csv", header = TRUE, sep = ",")
str(k84)
## 'data.frame': 90 obs. of 2 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ k84_D : num 5.32 5.32 5.31 0 0 0 4.83 4.81 4.81 0 ...
k84 <- k84 %>% filter(k84_D != 0)
# create a linear model
k84M <- lm(k84_D ~ strains, k84)
# do diagnostic of data variance
par(mfrow = c(2,2)); plot(k84M); par(mfrow = c(1,1))
## hat values (leverages) are all = 0.3333333
## and there are no factor predictors; no plot no. 5
anova(k84M)
## Analysis of Variance Table
##
## Response: k84_D
## Df Sum Sq Mean Sq F value Pr(>F)
## strains 14 18.9287 1.3520 108.13 < 2.2e-16 ***
## Residuals 30 0.3751 0.0125
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
k84_mean <- emmeans(k84M, specs = "strains")
cld(k84_mean, alpha = 0.05, Letters = "abcdefg")
## strains emmean SE df lower.CL upper.CL .group
## Tul003 2.87 0.0646 30 2.73 3.00 a
## Yub001 4.51 0.0646 30 4.38 4.64 b
## Sta005 4.52 0.0646 30 4.39 4.65 b
## CC001 4.82 0.0646 30 4.68 4.95 bc
## Sta003 5.11 0.0646 30 4.98 5.24 cd
## Tul001 5.13 0.0646 30 5.00 5.27 cde
## Sta001 5.28 0.0646 30 5.15 5.41 def
## Kin003 5.29 0.0646 30 5.16 5.43 def
## Yol001 5.31 0.0646 30 5.18 5.45 def
## Yol002 5.31 0.0646 30 5.18 5.45 def
## But001 5.32 0.0646 30 5.18 5.45 def
## C58 5.34 0.0646 30 5.21 5.48 def
## Sta004 5.41 0.0646 30 5.27 5.54 def
## SJ002 5.45 0.0646 30 5.32 5.59 ef
## SJ001 5.53 0.0646 30 5.39 5.66 f
##
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 15 estimates
## significance level used: alpha = 0.05
motility test
mot <- read.csv("motility.csv", header = TRUE, sep = ",")
str(mot)
## 'data.frame': 90 obs. of 2 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ mot_48h: num 3.32 3.31 3.36 3.5 3.48 3.51 3.71 3.7 3.65 3.2 ...
# create a linear model
motM <- lm(mot_48h ~ strains, mot)
# do diagnostic of data variance
par(mfrow = c(2,2)); plot(motM); par(mfrow = c(1,1))
## hat values (leverages) are all = 0.3333333
## and there are no factor predictors; no plot no. 5
anova(motM)
## Analysis of Variance Table
##
## Response: mot_48h
## Df Sum Sq Mean Sq F value Pr(>F)
## strains 29 7.1722 0.24732 48.304 < 2.2e-16 ***
## Residuals 60 0.3072 0.00512
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mot_mean <- emmeans(motM, specs = "strains")
cld(mot_mean, alpha = 0.05, Letters = "abcdefghiklmnopqrst")
## strains emmean SE df lower.CL upper.CL .group
## CL001 3.18 0.0413 60 3.09 3.26 a
## Kin002 3.26 0.0413 60 3.18 3.35 ab
## Yub002 3.27 0.0413 60 3.19 3.36 abc
## But001 3.33 0.0413 60 3.25 3.41 abcd
## Tul003 3.39 0.0413 60 3.31 3.47 abcde
## Tul002 3.44 0.0413 60 3.36 3.53 bcdef
## Teh002 3.46 0.0413 60 3.38 3.55 bcdefg
## Gle002 3.49 0.0413 60 3.41 3.57 bcdefgh
## But002 3.50 0.0413 60 3.41 3.58 cdefgh
## Teh001 3.50 0.0413 60 3.42 3.58 cdefgh
## Kin001 3.50 0.0413 60 3.42 3.59 defgh
## Sut001 3.51 0.0413 60 3.43 3.59 defgh
## Tul001 3.51 0.0413 60 3.43 3.59 defgh
## Kin003 3.59 0.0413 60 3.50 3.67 efghi
## C58 3.59 0.0413 60 3.51 3.68 efghi
## Gle001 3.62 0.0413 60 3.54 3.70 fghik
## Sta005 3.63 0.0413 60 3.54 3.71 fghik
## CC001 3.69 0.0413 60 3.60 3.77 ghikl
## Sta003 3.71 0.0413 60 3.62 3.79 hikl
## Sut002 3.71 0.0413 60 3.63 3.79 hikl
## Tul004 3.75 0.0413 60 3.66 3.83 ikl
## Sta001 3.75 0.0413 60 3.67 3.84 ikl
## Sta002 3.76 0.0413 60 3.68 3.85 ikl
## SJ002 3.81 0.0413 60 3.72 3.89 ikl
## Yol001 3.85 0.0413 60 3.76 3.93 klm
## SJ001 3.88 0.0413 60 3.79 3.96 lm
## Yol002 4.07 0.0413 60 3.98 4.15 mn
## Sta004 4.11 0.0413 60 4.03 4.19 n
## SJ003 4.19 0.0413 60 4.11 4.28 n
## Yub001 4.46 0.0413 60 4.37 4.54 o
##
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 30 estimates
## significance level used: alpha = 0.05
GROWTH RATE ANALYSIS
tsb1 <- read.table("box3_tsb_48hrs.csv", header = FALSE, sep = ",")
# extract table only including the obsorbance.
Table <- matrix(0, 98, 146)
for(i in 2:99){
colname <- paste("V", i, sep = "")
newcolumn <- tsb1[40:185,colname]
Table[i-1,] <- newcolumn
}
Table <- t(Table)
str(Table)
## chr [1:146, 1:98] "Time" "0:29:20" "0:59:20" "1:29:20" "1:59:20" "2:29:20" ...
Table <- as.data.frame(Table)
# clean table, and rename column names
Table <- Table[,-2]
#str(Table)
namescol <- Table[1,]
colnames(Table) <- namescol
Tab1 <- Table[-1,]
# convert Time format to secons then to hours
Tab1$Time <- (period_to_seconds(hms(Tab1$Time))/60/60)
# change column name from Time to time, and column A4 as blank
names(Tab1)[names(Tab1) == "Time"] <- "time"
names(Tab1)[names(Tab1) == "H12"] <- "blank"
# create a vector storing wells where agro grows and time and blank column
bch1 <- c("time","blank","A1","A2","A3","A5","A6","A7","A9","A10","A11","C1","C2","C3","C5","C6","C7","C9","C10","C11","E1","E2","E3","E5","E6","E7","G1","G2","G3","G5","G6","G7")
# remove other extra blank columns
Tab1 <- Tab1[,colnames(Tab1) %in% bch1]
bch2 <- c("blank","A1","A2","A3","A5","A6","A7","A9","A10","A11","C1","C2","C3","C5","C6","C7","C9","C10","C11","E1","E2","E3","E5","E6","E7","G1","G2","G3","G5","G6","G7")
# time column is already double type, the following for loop change convert the rest table contents to numberic
for (i in bch2){
Tab1[i] <- as.numeric(unlist(Tab1[i]))
}
# After the previous preparation, Tab1 one is ready for growthcurver analysis
Tab1$time <- round(Tab1$time, 2)
Tab1 <- Tab1[1:110,]
#colnames(Tab1)
# analyze the whole plate and remove the blank row
t1 <- SummarizeGrowthByPlate(Tab1, bg_correct = "blank")[-31,]
tsb_box3 <- c("C58_1","C58_2","C58_3","SJ003_1","SJ003_2","SJ003_3","Sta004_1","Sta004_2","Sta004_3","Sut002_1","Sut002_2","Sut002_3","Sta001_1","Sta001_2","Sta001_3","Sta005_1","Sta005_2","Sta005_3","SJ001_1","SJ001_2","SJ001_3","Sta002_1","Sta002_2","Sta002_3","SJ002_1","SJ002_2","SJ002_3","Sta003_1","Sta003_2","Sta003_3")
t1$sample <- tsb_box3
#write.table(t1, file = "/Users/dklabuser/limin/data_analysis/phenotypic_data/round3_growth_curveData/growthcurver_outputs/tsb_box3.csv", row.names = FALSE, col.names = TRUE, sep = ",")
The following two chunks will run the 6 csv files at one time. box1, box2, box3, 10 agro strains for each box, the order of 10 strains in both tsb and AB medium are the same.
box3 <- c("C58","C58","C58","SJ003","SJ003","SJ003","Sta004","Sta004","Sta004","Sut002","Sut002","Sut002","Sta001","Sta001","Sta001","Sta005","Sta005","Sta005","SJ001","SJ001","SJ001","Sta002","Sta002","Sta002","SJ002","SJ002","SJ002","Sta003","Sta003","Sta003")
box2 <- c("Tul001","Tul001","Tul001","Teh001","Teh001","Teh001","Yub001","Yub001","Yub001","Tul002","Tul002","Tul002","Teh002","Teh002","Teh002","Yub002","Yub002","Yub002","Tul003","Tul003","Tul003","Yol001","Yol001","Yol001","Tul004","Tul004","Tul004","Yol002","Yol002","Yol002")
box1 <- c("CC001","CC001","CC001","Sut001","Sut001","Sut001","Kin002","Kin002","Kin002","CL001","CL001","CL001","Gle001","Gle001","Gle001","Kin003","Kin003","Kin003","But001","But001","But001","Gle002","Gle002","Gle002","But002","But002","But002","Kin001","Kin001","Kin001")
bx <-list(box1=box1, box2=box2,box3=box3)
# create a vector for 6 excel tables
tabnames <- c("box1_AB_72hrs","box2_AB_72hrs","box3_AB_72hrs","box1_tsb_72hrs","box2_tsb_72hrs","box3_tsb_48hrs")
for (tab in tabnames){
a <- unlist(strsplit(tab,"_"))[1] # a will be assigned a vector.
c <- a # save a to a new object called c, which is used as a name when saving results.
b <- unlist(strsplit(tab,"_"))[2]
filen <- paste0("/Users/dklabuser/limin/data_analysis/phenotypic_data/round3_growth_curveData/", tab, ".csv")
tsb1 <- read.table(filen, header = FALSE, sep = ",")
# prepare path to save the data
lk <- "/Users/dklabuser/limin/data_analysis/phenotypic_data/round3_growth_curveData/growthcurver_outputs/"
pathTable <- paste0(lk,c,"_",b,".csv")
pathPlot <- paste0(lk,c,"_",b,".png")
# extract table only including the absorbance.
Table <- matrix(0, 98, 146)
for(i in 2:99){
colname <- paste("V", i, sep = "")
newcolumn <- tsb1[40:185,colname]
Table[i-1,] <- newcolumn
}
Table <- t(Table)
Table <- as.data.frame(Table)
# clean table, and rename column names
Table <- Table[,-2]
namescol <- Table[1,]
colnames(Table) <- namescol
Tab1 <- Table[-1,]
# convert Time format to seconds, then to hours
Tab1$Time <- (period_to_seconds(hms(Tab1$Time))/60/60)
# change column name from Time to time, and last column as blank
names(Tab1)[names(Tab1) == "Time"] <- "time"
names(Tab1)[names(Tab1) == "H12"] <- "blank"
# create a vector storing wells where agro grows and time and blank column, this is based on my exp design, I culture 10 isolate each time.
bch1 <- c("time","blank","A1","A2","A3","A5","A6","A7","A9","A10","A11","C1","C2","C3","C5","C6","C7","C9","C10","C11","E1","E2","E3","E5","E6","E7","G1","G2","G3","G5","G6","G7")
# remove other extra blank columns
Tab1 <- Tab1[,colnames(Tab1) %in% bch1]
# create bch2 vector to convert them to numneric data type, using following for loop.
bch2 <- c("blank","A1","A2","A3","A5","A6","A7","A9","A10","A11","C1","C2","C3","C5","C6","C7","C9","C10","C11","E1","E2","E3","E5","E6","E7","G1","G2","G3","G5","G6","G7")
# time column is already double type, the following for loop change convert the rest table contents to numberic
for (i in bch2){
#print(i)
Tab1[i] <- as.numeric(unlist(Tab1[i]))
}
# After the previous preparation, Tab1 one is ready for growthcurve analysis
Tab1$time <- round(Tab1$time, 2)
Tab1 <- Tab1[1:110,]
# analyze the whole plate and remove the blank row
t1 <- SummarizeGrowthByPlate(Tab1, bg_correct = "blank")[-31,]
SummarizeGrowthByPlate(Tab1, plot_fit = TRUE, plot_file = pathPlot) # save to pdf failed.
# this loop is to assign real sample names to vector a created in the very beginning.
for (i in 1:3){
nm <- names(bx)[i]
ifelse (a == nm,a <- unlist(bx[i]), print("KO")) # if statement in r needs to specify both true and false condition
}
# rename sample column using real sample name created from for loop, this column actually becomes a row names column.
t1$sample <- a
# save the result dataframe to a table
#write.table(t1, file = pathTable, row.names = FALSE, col.names = TRUE, sep = ",")
}
## Warning in grSoftVersion(): unable to load shared object '/Library/Frameworks/R.framework/Resources/modules//R_X11.so':
## dlopen(/Library/Frameworks/R.framework/Resources/modules//R_X11.so, 6): Library not loaded: /opt/X11/lib/libSM.6.dylib
## Referenced from: /Library/Frameworks/R.framework/Versions/4.0/Resources/modules/R_X11.so
## Reason: image not found
## Warning in cairoVersion(): unable to load shared object '/Library/Frameworks/R.framework/Resources/library/grDevices/libs//cairo.so':
## dlopen(/Library/Frameworks/R.framework/Resources/library/grDevices/libs//cairo.so, 6): Library not loaded: /opt/X11/lib/libXrender.1.dylib
## Referenced from: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/grDevices/libs/cairo.so
## Reason: image not found
## Warning in grDevices::cairo_pdf(plot_file, width = 12, height = 8): failed to
## load cairo DLL
## [1] "KO"
## [1] "KO"
## Warning in grDevices::cairo_pdf(plot_file, width = 12, height = 8): failed to
## load cairo DLL
## [1] "KO"
## [1] "KO"
## Warning in grDevices::cairo_pdf(plot_file, width = 12, height = 8): failed to
## load cairo DLL
## [1] "KO"
## [1] "KO"
## Warning in grDevices::cairo_pdf(plot_file, width = 12, height = 8): failed to
## load cairo DLL
## [1] "KO"
## [1] "KO"
## Warning in grDevices::cairo_pdf(plot_file, width = 12, height = 8): failed to
## load cairo DLL
## [1] "KO"
## [1] "KO"
## Warning in grDevices::cairo_pdf(plot_file, width = 12, height = 8): failed to
## load cairo DLL
## [1] "KO"
## [1] "KO"
the above code able to do the analysis, however, failed to generate pdf plot, and only 5 graphs can be printed in R.
plot growth rate and doubling time. First merge result tables from the same medium
# prepare 3 data frames of tsb medium
b1tsb <- read.table("box1_tsb.csv", header = TRUE, sep = ",")
b2tsb <- read.table("box2_tsb.csv", header = TRUE, sep = ",")
b3tsb <- read.table("box3_tsb.csv", header = TRUE, sep = ",")
b12 <- rbind(b1tsb,b2tsb)
b123 <- rbind(b12, b3tsb)
str(b123)
## 'data.frame': 90 obs. of 10 variables:
## $ sample: chr "CC001" "CC001" "CC001" "Sut001" ...
## $ k : num 0.825 0.399 0.427 0.641 0.554 ...
## $ n0 : num 0.02451 0.00043 0.00272 0.0012 0.0021 ...
## $ r : num 0.157 0.433 0.307 0.339 0.3 ...
## $ t_mid : num 22.2 15.8 16.4 18.5 18.6 ...
## $ t_gen : num 4.42 1.6 2.26 2.04 2.31 ...
## $ auc_l : num 26.9 15.6 16.5 23.4 20.2 ...
## $ auc_e : num 26.4 15.6 16.3 23.4 20.1 ...
## $ sigma : num 0.0608 0.00865 0.03089 0.0399 0.0407 ...
## $ note : logi NA NA NA NA NA NA ...
ggplot(b123, aes(x=sample, y= r, color = sample)) +
geom_boxplot()+
geom_jitter() +
ggtitle("growth rate of 30 agro in TSB medium") +
theme(axis.text.x=element_text(hjust=0.5,vjust=0.5,angle=90))
#ggsave(filename = "/Users/dklabuser/limin/data_analysis/phenotypic_data/round3_growth_curveData/growthcurver_outputs/tsb_GR.png", units = "cm", dpi = 3000)
ggplot(b123, aes(x=sample, y= t_gen, color = sample)) +
geom_boxplot()+
geom_jitter() +
ggtitle("double time of 30 agro in TSB medium") +
theme(axis.text.x=element_text(hjust=0.5,vjust=0.5,angle=90))
#ggsave(filename = "/Users/dklabuser/limin/data_analysis/phenotypic_data/round3_growth_curveData/growthcurver_outputs/tsb_dt.png", units = "cm", dpi = 3000)
# prepare 3 data frames of AB medium
b1AB <- read.table("box1_AB.csv", header = TRUE, sep = ",")
b2AB <- read.table("box2_AB.csv", header = TRUE, sep = ",")
b3AB <- read.table("box3_AB.csv", header = TRUE, sep = ",")
b12AB <- rbind(b1AB,b2AB)
b123AB <- rbind(b12AB, b3AB)
str(b123AB)
## 'data.frame': 90 obs. of 10 variables:
## $ sample: chr "CC001" "CC001" "CC001" "Sut001" ...
## $ k : num 0.184 0.23 0.237 0.341 0.321 ...
## $ n0 : num 1.91e-06 9.24e-05 1.30e-04 2.92e-04 1.66e-04 ...
## $ r : num 0.492 0.325 0.311 0.292 0.326 ...
## $ t_mid : num 23.3 24.1 24.1 24.2 23.2 ...
## $ t_gen : num 1.41 2.14 2.23 2.37 2.13 ...
## $ auc_l : num 5.83 7.1 7.32 10.51 10.2 ...
## $ auc_e : num 5.8 7.06 7.29 10.47 10.17 ...
## $ sigma : num 0.0112 0.0126 0.0124 0.0118 0.0108 ...
## $ note : logi NA NA NA NA NA NA ...
ggplot(b123AB, aes(x=sample, y= r, color = sample)) +
geom_boxplot()+
geom_jitter() +
ggtitle("growth rate of 30 agro in AB medium") +
theme(axis.text.x=element_text(hjust=0.5,vjust=0.5,angle=90))
#ggsave(filename = "/Users/dklabuser/limin/data_analysis/phenotypic_data/round3_growth_curveData/growthcurver_outputs/AB_GR.png", units = "cm", dpi = 3000)
ggplot(b123AB, aes(x=sample, y= t_gen, color = sample)) +
geom_boxplot()+
geom_jitter() +
ggtitle("double time of 30 agro in AB medium") +
theme(axis.text.x=element_text(hjust=0.5,vjust=0.5,angle=90))
#ggsave(filename = "/Users/dklabuser/limin/data_analysis/phenotypic_data/round3_growth_curveData/growthcurver_outputs/AB_dt.png", units = "cm", dpi = 3000)
PHENOTYPIC DATA CORRELATION ANALYSIS ################################################## prepare data for correlation analysis. Extract mean of each phenotypic data and save as a dataframe, then scatter plot two of them to see if there is linearity between two traits.
#virulence
datura <- read.table("datura_tumor.csv", header = TRUE, sep = ",")
str(datura)
## 'data.frame': 465 obs. of 4 variables:
## $ strains: chr "But001" "But001" "But001" "But001" ...
## $ weight : num 4.86 2.75 2.41 2.53 5.87 ...
## $ block : int 1 1 1 1 1 2 2 2 2 2 ...
## $ line : int 7 7 7 7 7 32 32 32 32 32 ...
od <- rep(1:15, 31)
datura <- datura %>% select(1,2) %>% cbind(od)
datura_wide <- spread(datura,strains,weight)
datura_wide <- datura_wide[,-1] # remove seq col
datura_wide <- datura_wide[,-11] # remove Mock col
tumor_means <- apply(datura_wide, 2, mean) # calculate mean for each isolate
df1 <- as.data.frame(tumor_means) # convert tumor weight means to a dataframe
# motility
mot <- read.csv("motility.csv", header = TRUE, sep = ",")
a <- rep(1:3,30)
mot <- cbind(mot, a)
str(mot)
## 'data.frame': 90 obs. of 3 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ mot_48h: num 3.32 3.31 3.36 3.5 3.48 3.51 3.71 3.7 3.65 3.2 ...
## $ a : int 1 2 3 1 2 3 1 2 3 1 ...
mot_wide <- spread(mot,strains,mot_48h)[,-1]
mot_mean <- apply(mot_wide, 2, mean)
df2 <- as.data.frame(mot_mean)
# carbenicillin
cb <- read.csv("carbenicillin.csv", header = TRUE, sep = ",")
a <- rep(1:3,30)
cb <- cbind(cb, a)
str(cb)
## 'data.frame': 90 obs. of 3 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ cb100_D: num 2.1 1.9 2.1 2.7 2.5 2.4 2 2.2 2.1 2.1 ...
## $ a : int 1 2 3 1 2 3 1 2 3 1 ...
cb_wide <- spread(cb,strains,cb100_D)[,-1]
cb_mean <- apply(cb_wide, 2, mean)
df3 <- as.data.frame(cb_mean)
# chloramphenicol
chl <- read.csv("chloramphenicol.csv", header = TRUE, sep = ",")
a <- rep(1:3,30)
chl <- cbind(chl, a)
str(chl)
## 'data.frame': 90 obs. of 3 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ c30_D : num 2.4 2.5 2.5 0.8 1 1 1 1 0.9 1.1 ...
## $ a : int 1 2 3 1 2 3 1 2 3 1 ...
chl_wide <- spread(chl,strains,c30_D)[,-1]
chl_mean <- apply(chl_wide, 2, mean)
df4 <- as.data.frame(chl_mean)
# ciprofloxacin
cip <- read.csv("ciprofloxacin.csv", header = TRUE, sep = ",")
a <- rep(1:3,30)
cip <- cbind(cip, a)
str(cip)
## 'data.frame': 90 obs. of 3 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ cip_D : num 2.8 3 2.9 3 3.1 3 3 3.1 3.2 3.1 ...
## $ a : int 1 2 3 1 2 3 1 2 3 1 ...
cip_wide <- spread(cip,strains,cip_D)[,-1]
cip_mean <- apply(cip_wide, 2, mean)
df5 <- as.data.frame(cip_mean)
# erythromycin
ery <- read.csv("erythromycin.csv", header = TRUE, sep = ",")
a <- rep(1:3,30)
ery <- cbind(ery, a)
str(ery)
## 'data.frame': 90 obs. of 3 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ e15_D : num 1.1 1 1 0.8 0.7 0.6 1.2 1.2 1.3 1.1 ...
## $ a : int 1 2 3 1 2 3 1 2 3 1 ...
ery_wide <- spread(ery,strains,e15_D)[,-1]
ery_mean <- apply(ery_wide, 2, mean)
df6 <- as.data.frame(ery_mean)
# rifampin
rif <- read.csv("rifampin.csv", header = TRUE, sep = ",")
a <- rep(1:3,30)
rif <- cbind(rif, a)
str(rif)
## 'data.frame': 90 obs. of 3 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ rif_D : num 1.3 1.2 1.3 1.2 1.2 1.3 1.4 1.3 1.3 1.2 ...
## $ a : int 1 2 3 1 2 3 1 2 3 1 ...
rif_wide <- spread(rif,strains,rif_D)[,-1]
rif_mean <- apply(rif_wide, 2, mean)
df7 <- as.data.frame(rif_mean)
# tetracycline
tet <- read.csv("tetracycline.csv", header = TRUE, sep = ",")
a <- rep(1:3,30)
tet <- cbind(tet, a)
str(tet)
## 'data.frame': 90 obs. of 3 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ te30_D : num 3.4 3.5 3.3 2.3 2.2 2.3 3 2.9 3.2 3.1 ...
## $ a : int 1 2 3 1 2 3 1 2 3 1 ...
tet_wide <- spread(tet,strains,te30_D)[,-1]
tet_mean <- apply(tet_wide, 2, mean)
df8 <- as.data.frame(tet_mean)
# k84
k84 <- read.csv("k84.csv", header = TRUE, sep = ",")
a <- rep(1:3,30)
k84 <- cbind(k84, a)
str(k84)
## 'data.frame': 90 obs. of 3 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ k84_D : num 5.32 5.32 5.31 0 0 0 4.83 4.81 4.81 0 ...
## $ a : int 1 2 3 1 2 3 1 2 3 1 ...
k84_wide <- spread(k84,strains,k84_D)[,-1]
k84_mean <- apply(k84_wide, 2, mean)
df9 <- as.data.frame(k84_mean)
# growth rate in TSB medium
df_tsb <- b123 %>% select(sample,r)
a <- rep(1:3,30)
df_tsb <- cbind(df_tsb,a)
df_tsb <- spread(df_tsb,sample,r)[,-1]
tsb_mean <- apply(df_tsb, 2, mean)
df10 <- as.data.frame(tsb_mean)
# growthrate in AB medium
df_ab <- b123AB %>% select(sample,r)
a <- rep(1:3,30)
df_ab <- cbind(df_ab,a)
df_ab <- spread(df_ab,sample,r)[,-1]
ab_mean <- apply(df_ab, 2, mean)
df11 <- as.data.frame(ab_mean)
data <- data.frame(df1,df2,df3,df4,df5,df6,df7,df8,df9,df10,df11)
pairs(data) # this plot the scatterplot
cor(data, y=NULL, use = "everything",method = "pearson") # this calculate the correlation coefficient.
## tumor_means mot_mean cb_mean chl_mean cip_mean
## tumor_means 1.000000000 0.23328349 -0.001211271 0.10545578 0.624594285
## mot_mean 0.233283485 1.00000000 -0.378131100 0.08622513 0.282789088
## cb_mean -0.001211271 -0.37813110 1.000000000 0.04905959 0.159726609
## chl_mean 0.105455783 0.08622513 0.049059587 1.00000000 0.129254443
## cip_mean 0.624594285 0.28278909 0.159726609 0.12925444 1.000000000
## ery_mean 0.063868511 0.50268607 -0.498628993 0.28420663 0.247892920
## rif_mean -0.173951226 -0.06792575 -0.189566297 0.14371371 0.037897170
## tet_mean -0.023505754 0.03411365 -0.147679681 0.30934431 -0.007402748
## k84_mean 0.630016963 0.39820249 0.094606118 0.27968414 0.355152649
## tsb_mean 0.016747457 0.24116355 -0.196180138 0.26651058 0.396053474
## ab_mean -0.105901230 -0.18609084 0.378483941 -0.16637148 0.112979675
## ery_mean rif_mean tet_mean k84_mean tsb_mean
## tumor_means 0.06386851 -0.17395123 -0.023505754 0.63001696 0.01674746
## mot_mean 0.50268607 -0.06792575 0.034113651 0.39820249 0.24116355
## cb_mean -0.49862899 -0.18956630 -0.147679681 0.09460612 -0.19618014
## chl_mean 0.28420663 0.14371371 0.309344311 0.27968414 0.26651058
## cip_mean 0.24789292 0.03789717 -0.007402748 0.35515265 0.39605347
## ery_mean 1.00000000 0.27714443 0.518291458 0.17650719 0.29697600
## rif_mean 0.27714443 1.00000000 0.176496514 -0.16703135 0.31978656
## tet_mean 0.51829146 0.17649651 1.000000000 0.23195963 0.19962885
## k84_mean 0.17650719 -0.16703135 0.231959628 1.00000000 -0.02683117
## tsb_mean 0.29697600 0.31978656 0.199628849 -0.02683117 1.00000000
## ab_mean 0.05392768 0.23218571 0.076060884 -0.01817376 -0.13846775
## ab_mean
## tumor_means -0.10590123
## mot_mean -0.18609084
## cb_mean 0.37848394
## chl_mean -0.16637148
## cip_mean 0.11297967
## ery_mean 0.05392768
## rif_mean 0.23218571
## tet_mean 0.07606088
## k84_mean -0.01817376
## tsb_mean -0.13846775
## ab_mean 1.00000000
Means of csv files. They contain 6 antibiotics, motility, k84, growth rate in TSB and AB medium, virulence. These data can also be used for preparing inputs for association analysis DBGWAS. The following generate mean of csv files of two antibiotics s10, va30, which are not included in above codes
s10 va30
test <- va30 # s10 and va30; change three place in the code
# convert each table from long to wide
test$uni <- c(rep(1:3,10))
str(test)
## 'data.frame': 90 obs. of 3 variables:
## $ strains: chr "But001" "But001" "But001" "But002" ...
## $ VA30 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ uni : int 1 2 3 1 2 3 1 2 3 1 ...
new <- spread(test,strains, VA30) %>% select(2:31) # delete the column added as unique index; change VA30
# calculate avg for each column
davg <- apply(new, 2, mean)
new2 <- rbind(new,davg)
new3 <- new2[-c(1:3), ] # keep avg
new3 <- data.frame(t(new3))
# save average value to a csv file for input preparation of association analysis
#write.table(new3, file = "/Users/dklabuser/limin/data_analysis/associationAnalysis/K-mer_association/cDBGWAS/agro30/strains30/va30.csv", row.names = TRUE, col.name = FALSE,sep = ",") # change save file each time.
Principal components analysis http://factominer.free.fr/factomethods/principal-components-analysis.html
#install.packages("FactoMineR")
Run PCA analysis of all phenotypic traits to see which are the most important pheno traits
# virulence- walnut tumor;
walnut_tumor <- read.table("walnut_tumor.csv", header = TRUE, sep = ",")
walnut_tumor <- walnut_tumor[order(walnut_tumor$strains),] # reorder df alphabetically
od <- rep(1:6, 31)
walnutumor <- walnut_tumor %>% cbind(od)
walnut_wide <- spread(walnutumor, strains, weight)
walnut_wide <- walnut_wide[,-1]
walnut_wide <- walnut_wide[,-11]
walnutumor_means <- apply(walnut_wide, 2, mean)
df0 <- as.data.frame(walnutumor_means)
# In total 12 pheno trains were includes: datura tumor, walnut tumor, mot, k84, AB, TSB, 6 antibiotic resistance data
data <- data.frame(df0,df1,df2,df3,df4,df5,df6,df7,df8,df9,df10,df11)
pairs(data) # plot the scatterplot
cor(data, y=NULL, use = "everything",method = "pearson") # calculate the correlation coefficient.
## walnutumor_means tumor_means mot_mean cb_mean
## walnutumor_means 1.00000000 0.314189468 0.01885661 0.073969694
## tumor_means 0.31418947 1.000000000 0.23328349 -0.001211271
## mot_mean 0.01885661 0.233283485 1.00000000 -0.378131100
## cb_mean 0.07396969 -0.001211271 -0.37813110 1.000000000
## chl_mean -0.09112783 0.105455783 0.08622513 0.049059587
## cip_mean 0.13225305 0.624594285 0.28278909 0.159726609
## ery_mean -0.10328689 0.063868511 0.50268607 -0.498628993
## rif_mean -0.03243981 -0.173951226 -0.06792575 -0.189566297
## tet_mean -0.39370056 -0.023505754 0.03411365 -0.147679681
## k84_mean -0.11143967 0.630016963 0.39820249 0.094606118
## tsb_mean -0.08788979 0.016747457 0.24116355 -0.196180138
## ab_mean 0.03689631 -0.105901230 -0.18609084 0.378483941
## chl_mean cip_mean ery_mean rif_mean tet_mean
## walnutumor_means -0.09112783 0.132253046 -0.10328689 -0.03243981 -0.393700559
## tumor_means 0.10545578 0.624594285 0.06386851 -0.17395123 -0.023505754
## mot_mean 0.08622513 0.282789088 0.50268607 -0.06792575 0.034113651
## cb_mean 0.04905959 0.159726609 -0.49862899 -0.18956630 -0.147679681
## chl_mean 1.00000000 0.129254443 0.28420663 0.14371371 0.309344311
## cip_mean 0.12925444 1.000000000 0.24789292 0.03789717 -0.007402748
## ery_mean 0.28420663 0.247892920 1.00000000 0.27714443 0.518291458
## rif_mean 0.14371371 0.037897170 0.27714443 1.00000000 0.176496514
## tet_mean 0.30934431 -0.007402748 0.51829146 0.17649651 1.000000000
## k84_mean 0.27968414 0.355152649 0.17650719 -0.16703135 0.231959628
## tsb_mean 0.26651058 0.396053474 0.29697600 0.31978656 0.199628849
## ab_mean -0.16637148 0.112979675 0.05392768 0.23218571 0.076060884
## k84_mean tsb_mean ab_mean
## walnutumor_means -0.11143967 -0.08788979 0.03689631
## tumor_means 0.63001696 0.01674746 -0.10590123
## mot_mean 0.39820249 0.24116355 -0.18609084
## cb_mean 0.09460612 -0.19618014 0.37848394
## chl_mean 0.27968414 0.26651058 -0.16637148
## cip_mean 0.35515265 0.39605347 0.11297967
## ery_mean 0.17650719 0.29697600 0.05392768
## rif_mean -0.16703135 0.31978656 0.23218571
## tet_mean 0.23195963 0.19962885 0.07606088
## k84_mean 1.00000000 -0.02683117 -0.01817376
## tsb_mean -0.02683117 1.00000000 -0.13846775
## ab_mean -0.01817376 -0.13846775 1.00000000
res.pca = PCA(data, scale.unit=TRUE, ncp=5, graph=T)
align phenotypic traits with pTi phylogeny trees. Here I include df0=walnut_tumor, df1=datura_tumor, df9=k84, df10=tsb, df11=ab. Because only 27 strains have pTi, so the non-virulent strains SJ003, But002, Tul002 were exluded from the above data frames.
# walnut tumor
name <- rownames(df0)
d0 <- data.frame(name)
dfvt <- data.frame(d0,df0)
dfwt <- dfvt %>% filter(!(name %in% c("But002", "SJ003", "Tul002")))
# datura tumor
ndf1 <- rownames(df1)
d1 <- data.frame(ndf1)
dfdatura <- data.frame(d1,df1)
dfdt <- dfdatura %>% filter(!(ndf1%in% c("But002", "SJ003", "Tul002")))
# dfnew <- data.frame(dfwt,dfdt)
# write.csv(dfnew, "/Users/dklabuser/limin/data_analysis/phenotypic_data/csv/opines_tumors.csv", row.names = FALSE)
# k84 test
nk84 <- rownames(df9)
d3 <- data.frame(nk84)
df84 <- data.frame(d3, df9)
dfk84 <- df84 %>% filter(!(nk84 %in% c("But002", "SJ003", "Tul002")))
# tsb
ntsb <- rownames(df10)
d4 <- data.frame(ntsb)
dftb <- data.frame(d4, df10)
dftsb <- dftb %>% filter(!(ntsb %in% c("But002", "SJ003", "Tul002")))
# ab
nab <- rownames(df11)
d5 <- data.frame(nab)
dfa <- data.frame(d5, df11)
dfab <- dfa %>% filter(!(nab %in% c("But002", "SJ003", "Tul002")))
check number of T6SS effectors with phenotypic traits Here I include df0=walnut_tumor, df1=datura_tumor, df9=k84, df10=tsb, df11=ab
strain = c("C58","But001","But002","CL001","Kin001","Kin002","Kin003","SJ003","Sta001","Sta004","Sta005","Tul002","Yol001","Yub002","Teh002")
num = c(17,17,11,14,13,19,13,11,10,16,15,11,11,16,11)
dft6es <- data.frame(strain, num)
rn <- dft6es[,1]
dt6ss <- data.frame(dft6es[,2])
row.names(dt6ss) <- rn
# walnut tumor
name <- rownames(df0)
#d0 <- data.frame(name)
#dfvt <- data.frame(d0,df0)
dfwt_t6s <- df0 %>% filter(name %in% c("C58","But001","But002","CL001","Kin001","Kin002","Kin003","SJ003","Sta001","Sta004","Sta005","Tul002","Yol001","Yub002","Teh002"))
# datura tumor
ndf1 <- rownames(df1)
#d1 <- data.frame(ndf1)
#dfdatura <- data.frame(d1,df1)
dfdt_t6s <- df1 %>% filter(ndf1%in% c("C58","But001","But002","CL001","Kin001","Kin002","Kin003","SJ003","Sta001","Sta004","Sta005","Tul002","Yol001","Yub002","Teh002"))
str(dfdt)
## 'data.frame': 27 obs. of 2 variables:
## $ ndf1 : chr "But001" "C58" "CC001" "CL001" ...
## $ tumor_means: num 2.75 1.48 2.78 2.01 1.12 ...
# k84 test
nk84 <- rownames(df9)
#d3 <- data.frame(nk84)
#df84 <- data.frame(d3, df9)
dfk84_t6s <- df9 %>% filter(nk84 %in% c("C58","But001","But002","CL001","Kin001","Kin002","Kin003","SJ003","Sta001","Sta004","Sta005","Tul002","Yol001","Yub002","Teh002"))
# tsb
ntsb <- rownames(df10)
#d4 <- data.frame(ntsb)
#dftb <- data.frame(d4, df10)
dftsb_t6s <- df10 %>% filter(ntsb %in% c("C58","But001","But002","CL001","Kin001","Kin002","Kin003","SJ003","Sta001","Sta004","Sta005","Tul002","Yol001","Yub002","Teh002"))
# ab
nab <- rownames(df11)
#d5 <- data.frame(nab)
#dfa <- data.frame(d5, df11)
dfab_t6s <- df11 %>% filter(nab %in% c("C58","But001","But002","CL001","Kin001","Kin002","Kin003","SJ003","Sta001","Sta004","Sta005","Tul002","Yol001","Yub002","Teh002"))
data <- data.frame(dt6ss,dfwt_t6s,dfdt_t6s,dfk84_t6s,dftsb_t6s,dfab_t6s)
pairs(data) # this plot the scatterplot
cor(data, y=NULL, use = "everything",method = "pearson") # this calculate the correlation coefficient.
## dft6es...2. walnutumor_means tumor_means k84_mean
## dft6es...2. 1.0000000 -0.1236971 0.100580051 0.10815768
## walnutumor_means -0.1236971 1.0000000 0.674968554 0.37313326
## tumor_means 0.1005801 0.6749686 1.000000000 0.71347928
## k84_mean 0.1081577 0.3731333 0.713479284 1.00000000
## tsb_mean 0.1370875 0.1007635 -0.004739094 0.06174943
## ab_mean -0.4835712 0.1706941 -0.085508181 0.03592637
## tsb_mean ab_mean
## dft6es...2. 0.137087543 -0.48357115
## walnutumor_means 0.100763521 0.17069413
## tumor_means -0.004739094 -0.08550818
## k84_mean 0.061749429 0.03592637
## tsb_mean 1.000000000 -0.12295914
## ab_mean -0.122959139 1.00000000
ggsave("/Users/dklabuser/limin/data_analysis/T6SS/t6ss_walnut_agro/PCA_t6ss_pheno/cor_t6ss_pheno.png")
## Saving 7 x 5 in image
res.pca = PCA(data, scale.unit=TRUE, ncp=5, graph=T)
ggsave("/Users/dklabuser/limin/data_analysis/T6SS/t6ss_walnut_agro/PCA_t6ss_pheno/pcat6ss_pheno.png")
## Saving 7 x 5 in image
pTi phylogeney tree
tre <- read.tree("pTi_accessory_binary_genes.fa.newick")
n <- MRCA(tre, c("Tul003", "Teh002"))
tre <- root(tre,node = n)
tipName <- tre$tip.label
p <- ggtree(tre, layout = "rectangular") +
geom_tiplab(size=2)
#geom_tippoint(size=1.5)
# rotate the tree for T-DNA right border alignment.
p <- flip(p, 6, 34) %>% flip(19,32)
p
nt_sequence <- "RB_aln.fasta"
x <- readDNAStringSet(nt_sequence)
data1 = tidy_msa(nt_sequence)
p1 <- p + geom_facet(geom = geom_msa, data = data1, panel = "T-DNA_RB", font = NULL, color = "Chemistry_NT") +
xlim_tree(2)
p2 <- facet_plot(p1, panel = "walnut", data = dfwt, geom = geom_segment, aes(x=0, xend=walnutumor_means, y=y, yend=y), size=3, color="red") %>%
facet_plot(panel = "datura_tumor", data = dfdt, geom = geom_segment,aes(x=0, xend=tumor_means, y=y, yend=y),size=3,color="pink") %>%
facet_plot(panel = "datura_tumor", data = dfdt, geom = geom_segment,aes(x=0, xend=tumor_means, y=y, yend=y),size=3,color="magenta") %>%
facet_plot(panel = "K84_sensitivity", data = dfk84, geom = geom_segment,aes(x=0, xend=k84_mean, y=y, yend=y),size=3,color="blue") %>%
facet_plot(panel = "TSB_growthRate", data = dftsb, geom = geom_segment,aes(x=0, xend=tsb_mean, y=y, yend=y),size=3,color="magenta") %>%
facet_plot(panel = "AB_growthRate", data = dfab, geom = geom_segment,aes(x=0, xend=ab_mean, y=y, yend=y),size=3,color="blue")
p2
ggsave("/Users/dklabuser/limin/Dissertation_papers/dissertation/chapter4_genetic_diversity/RB_pheno.png")
## Saving 7 x 5 in image
Perform t-test on two samples
df_tumors <- read.csv("opines_tumors.csv")
str(df_tumors)
## 'data.frame': 23 obs. of 4 variables:
## $ name : chr "But001" "CC001" "Gle001" "Gle002" ...
## $ walnut: num 3.96 2.45 3.06 3.91 2.36 ...
## $ datura: num 2.75 2.78 1.12 2.6 1.46 ...
## $ group : chr "sus" "sus" "ags" "ags" ...
# evaluate if equal variance
var.test(df_tumors$walnut~ df_tumors$group)
##
## F test to compare two variances
##
## data: df_tumors$walnut by df_tumors$group
## F = 7.2965, num df = 10, denom df = 11, p-value = 0.002871
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 2.069524 26.740905
## sample estimates:
## ratio of variances
## 7.296462
# t-test
t.test(df_tumors$walnut~ df_tumors$group, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: df_tumors$walnut by df_tumors$group
## t = 2.0785, df = 12.491, p-value = 0.05889
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1310119 6.1237720
## sample estimates:
## mean in group ags mean in group sus
## 6.873727 3.877347
#t.test(df_tumors$walnut~ df_tumors$group, var.equal=TRUE)
# evaluate if true variance
var.test(df_tumors$datura~ df_tumors$group)
##
## F test to compare two variances
##
## data: df_tumors$datura by df_tumors$group
## F = 0.76724, num df = 10, denom df = 11, p-value = 0.6838
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2176158 2.8118753
## sample estimates:
## ratio of variances
## 0.7672418
# t-test
#t.test(df_tumors$datura~ df_tumors$group, var.equal=FALSE)
t.test(df_tumors$datura~ df_tumors$group, var.equal=TRUE)
##
## Two Sample t-test
##
## data: df_tumors$datura by df_tumors$group
## t = -3.0513, df = 21, p-value = 0.006066
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.5134349 -0.2866065
## sample estimates:
## mean in group ags mean in group sus
## 1.852618 2.752639
ggplot(df_tumors, aes(x=group, y= walnut)) +
geom_boxplot()
#geom_point()
ggsave("/Users/dklabuser/limin/Dissertation_papers/dissertation/chapter4_genetic_diversity/walnut_opine.png")
## Saving 7 x 5 in image
ggplot(df_tumors, aes(x=group, y= datura)) +
geom_boxplot(outlier.shape = NA) + # outlier.shape = NA to hide the outlier display
#geom_point() +
geom_text(x=2, y=4,label = "*", size=10, color="red") # add significant star
ggsave("/Users/dklabuser/limin/Dissertation_papers/dissertation/chapter4_genetic_diversity/datura_opine.png")
## Saving 7 x 5 in image
remove outliers
df_tumors <- read.csv("opines_tumors.csv")
ggplot(df_tumors, aes(x=group, y= datura)) +
geom_boxplot(outlier.shape = NA) +
#geom_point() +
geom_text(x=2, y=4,label = "*", size=10, color="red")
# t-test to check walnut and datura has significant difference on walnut tumor size caused by the same opine type.
df_opine <- read.csv("plants_opine_tumor.csv")
# agropine
df_ags <- df_opine[1:22,1:2]
# check if equal variance
var.test(df_ags$ags~df_ags$host1)
##
## F test to compare two variances
##
## data: df_ags$ags by df_ags$host1
## F = 0.021215, num df = 10, denom df = 10, p-value = 9.093e-07
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.005707794 0.078850563
## sample estimates:
## ratio of variances
## 0.02121468
t.test(df_ags$ags~df_ags$host1, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: df_ags$ags by df_ags$host1
## t = -3.6567, df = 10.424, p-value = 0.004122
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8.063858 -1.978360
## sample estimates:
## mean in group datura mean in group walnut
## 1.852618 6.873727
ggplot(df_ags, aes(x=host1, y= ags)) +
geom_boxplot(outlier.shape = NA) +
geom_text(x=1, y=6,label = "*", size=10, color="red") # add significant star
#geom_point()
ggsave("/Users/dklabuser/limin/Dissertation_papers/dissertation/chapter4_genetic_diversity/ags_plant.png")
## Saving 7 x 5 in image
# succinamopine
df_sus <- df_opine[,3:4]
var.test(df_sus$sus~df_sus$host2)
##
## F test to compare two variances
##
## data: df_sus$sus by df_sus$host2
## F = 0.20175, num df = 11, denom df = 11, p-value = 0.01328
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.05807971 0.70082374
## sample estimates:
## ratio of variances
## 0.2017514
t.test(df_sus$sus~df_sus$host2, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: df_sus$sus by df_sus$host2
## t = -2.1302, df = 15.265, p-value = 0.04981
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.248357999 -0.001058667
## sample estimates:
## mean in group datura mean in group walnut
## 2.752639 3.877347
ggplot(df_sus, aes(x=host2, y= sus)) +
geom_boxplot(outlier.shape = NA) + # outlier.shape = NA to hide the outlier display
#geom_point() +
geom_text(x=1, y=4,label = "*", size=10, color="red") # add significant star
ggsave("/Users/dklabuser/limin/Dissertation_papers/dissertation/chapter4_genetic_diversity/sus_plant.png")
## Saving 7 x 5 in image
tre30 <- read.tree("30Agro_coreGen.tre")
tree_rooted <- root.phylo(tre30, outgroup = "Yub001")
tipName <- tre$tip.label
tree_rooted
##
## Phylogenetic tree with 30 tips and 28 internal nodes.
##
## Tip labels:
## SJ001, SJ002, Sta003, Sta001, Tul001, Sut001, ...
## Node labels:
## 1.000, , 1.000, 0.982, 0.937, 1.000, ...
##
## Unrooted; includes branch lengths.
p30 <- ggtree(tree_rooted, layout = "rectangular") +
geom_tiplab(size=2)
p30
p1 <- facet_plot(p30+xlim_tree(0.3), panel = "walnut_tumor", data = dfvt, geom = geom_segment, aes(x=0, xend=walnutumor_means,y=y, yend=y),size=3,color="blue") %>%
facet_plot(panel = "datura_tumor", data = dfdatura, geom = geom_segment,aes(x=0, xend=tumor_means, y=y, yend=y),size=3,color="red") %>%
#facet_plot(panel = "K84_sensitivity", data = df84, geom = geom_segment,aes(x=0, xend=k84_mean, y=y, yend=y),size=3,color="blue") %>%
facet_plot(panel = "TSB_growthRate", data = dftb, geom = geom_segment,aes(x=0, xend=tsb_mean, y=y, yend=y),size=3,color="red") %>%
facet_plot(panel = "AB_growthRate", data = dfa, geom = geom_segment,aes(x=0, xend=ab_mean, y=y, yend=y),size=3,color="blue")
p1